In [3]:
import pylearn2.utils
import pylearn2.config
import theano
import neukrill_net.dense_dataset
import neukrill_net.utils
import numpy as np
%matplotlib inline
import matplotlib.pyplot as plt
#import holoviews as hl
#%load_ext holoviews.ipython
import sklearn.metrics
m = pylearn2.utils.serial.load(
"/disk/scratch/neuroglycerin/models/quicker_learning_onelessbig_layer_experiment.pkl")
In [7]:
channel = m.monitor.channels["valid_y_y_1_nll"]
plt.plot(channel.example_record,channel.val_record)
Out[7]:
In [8]:
m = pylearn2.utils.serial.load(
"/disk/scratch/neuroglycerin/models/quicker_learning_onelessbig_layer_experiment_recent.pkl")
In [9]:
channel = m.monitor.channels["valid_y_y_1_nll"]
plt.plot(channel.example_record,channel.val_record)
Out[9]:
We are having some problems around epoch 10.
In [8]:
m = pylearn2.utils.serial.load(
"/disk/scratch/neuroglycerin/models/quicker_learning_1_fc_layer_experiment_recent.pkl")
In [37]:
channel = m.monitor.channels["learning_rate"]
plt.plot(channel.val_record)
Out[37]:
Learning rate is not the cause.
In [40]:
cs = [c for c in m.monitor.channels if "norm" in c]
In [41]:
for c in cs:# m.monitor.channels:
channel = m.monitor.channels[c]
plt.title(c)
plt.plot(channel.val_record)
plt.grid(True)
plt.show()
In [28]:
vals_at_e10 = [(c, [float(v) for v in m.monitor.channels[c].val_record[8:12]]) for c in cs]
print(vals_at_e10)
Going to change max_kernel_norm in convolutional layers to 0.5 to see if it explains the failure on epoch 10.
In [64]:
m = pylearn2.utils.serial.load(
"/disk/scratch/neuroglycerin/models/experiment_maxkernelnorm_recent.pkl")
In [65]:
channel = m.monitor.channels["valid_y_y_1_nll"]
plt.plot(channel.val_record)
Out[65]:
In [51]:
channel = m.monitor.channels["train_h1_kernel_norms_max"]
plt.plot(channel.val_record)
channel = m.monitor.channels["train_h2_kernel_norms_max"]
plt.plot(channel.val_record)
channel = m.monitor.channels["train_h3_kernel_norms_max"]
plt.plot(channel.val_record)
Out[51]:
In [68]:
channel = m.monitor.channels["train_h4_col_norms_min"]
plt.plot(channel.val_record)
Out[68]:
Things broke again once kernel_norms_max on convolutional layers reached 0.5, it seems.
In [59]:
channel = m.monitor.channels["train_y_y_1_col_norms_max"]
plt.plot(channel.val_record)
channel = m.monitor.channels["train_y_y_2_col_norms_max"]
plt.plot(channel.val_record)
channel = m.monitor.channels["train_y_y_3_col_norms_max"]
plt.plot(channel.val_record)
channel = m.monitor.channels["train_y_y_4_col_norms_max"]
plt.plot(channel.val_record)
channel = m.monitor.channels["train_y_y_5_col_norms_max"]
plt.plot(channel.val_record)
channel = m.monitor.channels["train_y_y_6_col_norms_max"]
plt.plot(channel.val_record)
Out[59]:
We now think that the col_norms in the fully connected layer is set wrongly with the initial weights (set by ist_dev). And apparently kernel_norms in the convolutional layers is like the same thing. The practical advice we found is to set the col_norms_max to 80% of the value of the col_norms_mean value we saturate at without specifying the max.
This is the model with bigger 1st convolutional layer (128 output channels) and changed decay factor (which didn't actually do something i wanted it to). It is primarily being kept running to evaluate the importance of big conv layer.
In [69]:
m = pylearn2.utils.serial.load(
"/disk/scratch/neuroglycerin/models/experiment_biglayer_largedecay_recent.pkl")
In [54]:
channel = m.monitor.channels["valid_y_y_1_nll"]
plt.plot(channel.val_record)
Out[54]:
In [72]:
channel = m.monitor.channels["train_h4_col_norms_min"]
plt.plot(channel.val_record)
Out[72]:
In [60]:
m = pylearn2.utils.serial.load(
"/disk/scratch/neuroglycerin/models/experiment_biglayer_largedecay.pkl")
In [61]:
channel = m.monitor.channels["valid_y_y_1_nll"]
plt.plot(channel.val_record)
Out[61]: